* Figure A1 - Cumulative Attrition
* Sample used to study investments and agricultural outcomes
* Last Updated, Niriksha Shetty 03/31/206

clear
clear mata
clear matrix
set maxvar 20000
set more off

pwd
cap cd c(`pwd')

estimates clear

use "../dta/reduced-form-master.dta", clear

* evolution of balanced panel
local num 0
tempfile panel 
tempfile tempdata 
save `panel'
foreach i of numlist 2005/2013 {
local num = `num' + 1
keep if mkt_year <= `i'
keep id mkt_year c_problem
bys id c_problem: gen balanced`i' = (_N ==`num') 
bys id : gen balanced_miss`i'= (_N == `num') 
save `tempdata', replace 
use `panel', replace 
merge m:1 id mkt_year using `tempdata' , nogen
save `panel', replace
}

** adding in 2009 HHs

local num 0
tempfile panel 
tempfile tempdata1 
save `panel'
foreach i of numlist 2008/2013 {
local num = `num' + 1
keep if mkt_year <= `i'
keep id mkt_year c_problem firstt_2009
bys id c_problem: gen bal09_`i' = (_N ==`num') if firstt_2009==1
bys id : gen balanced_miss09_`i'= (_N == `num') if firstt_2009==1
save `tempdata1', replace 
use `panel', replace 
merge m:1 id mkt_year using `tempdata1' , nogen
save `panel', replace
}

local num 0
foreach i of numlist 2005/2013{
egen FS_`i'= count(id) if balanced_miss`i'==1 
egen FS_miss_`i' = count(id) if balanced`i'==1
egen To_`i' = count(id) if originaltvillage==1 & balanced`i' == 1
egen Tn_`i' = count(id) if firstt_2007==1 & balanced`i' == 1
egen C_`i' = count(id) if Tvillage==0 & balanced`i' == 1
}

foreach i of numlist 2008/2013{
egen FS09_`i'= count(id) if balanced_miss09_`i'==1 & firstt_2009==1
egen FS09_miss_`i' = count(id) if bal09_`i'==1 & firstt_2009==1
egen T9_`i' = count(id) if firstt_2009==1 & bal09_`i' == 1 & firstt_2009==1
}


local num 0
foreach i of numlist 2005/2013{
local num = `num' + 1
replace FS_miss_`i' = FS_miss_`i'/`num'
replace FS_`i' = FS_`i'/`num'
replace To_`i' = To_`i'/`num'
replace Tn_`i' = Tn_`i'/`num'
replace C_`i' = C_`i'/`num'  
}

local num 0
foreach i of numlist 2008/2013{
local num = `num' + 1
replace FS09_miss_`i' = FS09_miss_`i'/`num'
replace FS09_`i' = FS09_`i'/`num'
replace T9_`i' = T9_`i'/`num'
}

forvalues i = 2005/2013 {
foreach var of varlist To_`i' Tn_`i' C_`i' FS_`i' FS_miss_`i' {
replace `var' = . if mkt_year != `i'
}
}

forvalues i = 2008/2013 {
foreach var of varlist T9_`i' FS09_`i' FS09_miss_`i' {
replace `var' = . if mkt_year != `i'
}
}

egen To = rowtotal(To_*)
egen Tn = rowtotal(Tn_*)
egen T9 = rowtotal(T9_*)
egen C = rowtotal(C_*)
egen FS = rowtotal (FS_2* )
egen FS_miss= rowtotal(FS_miss_*)
egen FS09 = rowtotal(FS09_2*)
egen FS09_miss = rowtotal(FS09_2*)

egen FS_add = max(FS09)
egen FS09_miss_add = max(FS09_miss)

replace To = . if To == 0
replace Tn = . if Tn == 0
replace C = . if C == 0
replace T9 = . if T9 == 0
replace FS=. if FS==0
replace FS_miss=. if FS_miss==0
replace FS_add = . if FS==0
replace FS09_miss_add =. if FS_miss==0

replace FS= FS+ FS_add if mkt_year >=2008
replace FS_miss = FS_miss + FS09_miss_add if mkt_year >=2008

lab var To "Treatment Group 1 (excluding missing outcome data)"
lab var Tn "Treatment Group 2 (excluding missing outcome data)"
lab var C "Control Group (excluding missing outcome data)" 
la var FS "Full Balanced Panel (including missing outcome data)"
la var FS_miss "Balanced Panel (excluding missing outcome data)"

lab var mkt_year "Year"

gen intended_To= 480
gen intended_Tn= 299
gen intended_C= 720
gen intended_T9 = 403
gen intended_FS =1902
gen intended_FS_miss = 1902

gen intended_FS_orig=1499
gen intended FS_miss_orig=1499

gen attrition_To=.
gen attrition_Tn=.
gen attrition_C=.
gen attrition_FS=.
gen attrition_FS_miss=.
gen attrition_T9=.
forvalues i = 2008/2013{
foreach var of varlist attrition_To attrition_Tn attrition_C attrition_FS attrition_FS_miss attrition_T9 {
replace attrition_To = ((intended_To-To)*100)/intended_To if mkt_year == `i'
replace attrition_Tn = ((intended_Tn-Tn)*100)/intended_Tn if mkt_year == `i'
replace attrition_C = ((intended_C-C)*100)/intended_C if mkt_year == `i'
replace attrition_FS = ((intended_FS-FS)*100)/intended_FS if mkt_year == `i'
replace attrition_FS_miss = ((intended_FS_miss-FS_miss)*100)/intended_FS_miss if mkt_year == `i'
}
}

forvalues i = 2005/2018{
foreach var of varlist attrition_To attrition_Tn attrition_C attrition_FS attrition_FS_miss attrition_T9 {
replace attrition_To = ((intended_To-To)*100)/intended_To if mkt_year == `i'
replace attrition_Tn = ((intended_Tn-Tn)*100)/intended_Tn if mkt_year == `i'
replace attrition_C = ((intended_C-C)*100)/intended_C if mkt_year == `i'
replace attrition_FS = ((intended_FS-FS)*100)/intended_FS_orig if mkt_year == `i'
replace attrition_FS_miss = ((intended_FS_miss_orig-FS_miss)*100)/intended_FS_miss_orig if mkt_year == `i'
}
}

lab var attrition_To "Treatment Group 1 (excluding missing outcome data)"
lab var attrition_Tn "Treatment Group 2 (excluding missing outcome data)"
lab var attrition_C "Control Group (excluding missing outcome data)" 
la var attrition_FS "Full Balanced Panel (including missing outcome data)"
la var attrition_FS_miss "Balanced Panel (excluding missing outcome data)"
 

twoway connected  attrition_FS attrition_FS_miss attrition_To attrition_Tn attrition_C mkt_year, sort ytitle("Percentage of Respondents", size(s)) graphregion(color(white)) plotregion(icolor(white)) msymbol(T D O) ylabel(0(20)100 ,angle (30)) xlabel(2005(2)2013)
graph export "../output/graphs/af1b.emf", replace
